Loading packages
Pulling the most recent data from covidtracking.com…
## [1] "Retrieved 2021-01-25 19:36:20"
## [1] "Data as of 2021-01-25"
Narrowing data…
Adding state population data…
Building tables…
done
Tests & cases
my_data %>%
inner_join(code_names, by = "code") %>%
inner_join(st_pop_2019, by = "name") %>%
filter(as_of >= Sys.Date() - days(7)) %>%
mutate(week_num = lubridate::week(as_of)) %>%
group_by(code) %>%
summarize(pos_rate = sum(case_ch) / sum(test_ch), reg_2 = min(reg_2)) %>%
mutate(pos_rate = if_else(pos_rate < 0, 0, pos_rate)) %>%
filter(pos_rate < 0.05 | pos_rate > 0.10) %>%
ggplot(mapping = aes(x = reorder(code, pos_rate), y = pos_rate, fill = reg_2)) +
geom_col() +
geom_label(mapping = aes(label = code)) +
labs(x = "Date", y = "Share of COVID-19 tests with positive results", title = "Positive rates remain high in the Midwest and plains", subtitle = str_c("Week ending ", max(my_data$as_of))) +
theme(legend.title = element_blank(), axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
## `summarise()` ungrouping output (override with `.groups` argument)
Tests & cases
my_data %>%
inner_join(code_names, by = "code") %>%
inner_join(st_pop_2019, by = "name") %>%
filter(as_of >= Sys.Date() - days(7)) %>%
mutate(case_days = if_else(between(as_of, Sys.Date() - days(6), Sys.Date() - days(6)), 1, 0),
death_days = if_else(as_of >= Sys.Date() - days(6), 1, 0)) %>%
group_by(code) %>%
summarize(cfr = sum(death_ch * death_days) / sum(case_ch * case_days), reg_2 = min(reg_2)) %>%
ggplot(mapping = aes(x = reorder(code, cfr), y = cfr, fill = reg_2)) +
geom_col() +
geom_label(mapping = aes(label = code)) +
labs(x = "Date", y = "Deaths as a share of confirmed cases", title = "Case fatality rate") +
theme(legend.title = element_blank(), axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
## `summarise()` ungrouping output (override with `.groups` argument)
Tests & cases
my_data %>%
inner_join(code_names, by = "code") %>%
inner_join(st_pop_2019, by = "name") %>%
filter(as_of >= Sys.Date() - weeks(4)) %>%
mutate(week_num = lubridate::week(as_of)) %>%
group_by(week_num, reg_2) %>%
summarize(as_of = median(as_of), pos_rate = sum(case_ch) / sum(test_ch)) %>%
ggplot(mapping = aes(x = as_of, y = pos_rate, color = reg_2, fill = reg_2)) +
geom_smooth(alpha = .05, span = .5) +
labs(x = "Date", y = "Share of COVID-19 tests with positive results", title = "Positive rates are trending upward", subtitle = str_c("Month ending ", max(my_data$as_of))) +
theme(legend.title = element_blank())
## `summarise()` regrouping output by 'week_num' (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Tests & cases
my_data %>%
inner_join(code_names, by = "code") %>%
inner_join(st_pop_2019, by = "name") %>%
filter(as_of >= Sys.Date() - months(1)) %>%
mutate(week_num = lubridate::week(as_of)) %>%
group_by(week_num, reg_2) %>%
summarize(as_of = median(as_of), pos_rate = sum(case_ch) / sum(test_ch), wk_test = sum(test_ch)) %>%
ggplot(mapping = aes(x = as_of, y = pos_rate, color = reg_2, fill = reg_2)) +
geom_smooth(span = .5) +
geom_line() +
labs(x = "Date", y = "Share of COVID-19 tests with positive results", title = "Positive rates remain high in the plains and midwest", subtitle = str_c("Two months ending ", max(my_data$as_of))) +
theme(legend.title = element_blank())
## `summarise()` regrouping output by 'week_num' (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Tests & cases
my_data %>%
inner_join(code_names, by = "code") %>%
inner_join(st_pop_2019, by = "name") %>%
filter(as_of >= Sys.Date() - months(1)) %>%
mutate(week_num = lubridate::week(as_of)) %>%
group_by(week_num, code) %>%
summarize(pos_rate = sum(case_ch) / sum(test_ch),
as_of = median(as_of),
reg_2 = mode(reg_2)) %>%
group_by(code, week_num) %>%
mutate(y_label = if_else(as_of == median(as_of, na.rm = TRUE),
median(pos_rate, na.rm = TRUE),
NULL),
month_pos = mean(pos_rate, na.rm = TRUE)) %>%
ungroup() %>%
ggplot(mapping = aes(x = as_of, y = pos_rate)) +
geom_line(alpha = 0, span = .6, mapping = aes(color = reg_2)) +
geom_label(mapping = aes(y = y_label, label = code), na.rm = TRUE) +
labs(x = "Date", y = "Share of COVID-19 tests with positive results", title = "Positive rates remain high in some states", subtitle = str_c(Sys.Date() - months(1), " through ", max(my_data$as_of), "; Ordered by highest positive rate for the month"))
## `summarise()` regrouping output by 'week_num' (override with `.groups` argument)
# +
# theme(legend.title = element_blank()) +
# facet_wrap(facets = vars(reorder(code, desc(month_pos))), nrow = 5) +
# theme(axis.text = element_blank(),
# axis.ticks = element_blank(),
# axis.title.x = element_blank(),
# strip.text.x = element_blank(),
# legend.position = 0)
## Warning: Removed 1 rows containing missing values (geom_label_repel).
Adds 2016 presidential election results
results_2016 <- read_csv("results_2016.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## code = col_character(),
## t_mgn = col_double()
## )
pk <- c("OK")
my_data %>%
filter(code %in% pk) %>%
mutate(case_ch = as.double(case_ch)) %>%
group_by(as_of) %>%
summarize(us_cases = sum(cases, na.rm = TRUE),
us_cases_ch = sum(case_ch, na.rm = TRUE),
us_deaths = sum(death, na.rm = TRUE),
us_deaths_ch = sum(death_ch, na.rm = TRUE)) %>%
filter(us_deaths > 9,
weekdays(as_of) %in% c("Sunday", "Tuesday")) %>%
mutate(since = min(as_of),
is_day = if_else(weekdays(as_of) == weekdays(my_data[[1,1]]), 0.3, 0.1)) %>%
ggplot(mapping = aes(x = as_of, y = us_cases_ch, color = weekdays(as_of))) +
geom_smooth(alpha = 0, span = 0.3) +
theme(legend.position = "none") +
labs(title = "Two states whose paths diverged in November", subtitle = str_c("New cases of COVID-19 per day since", min(my_data[death_ch > 9 & code == 'OK', as_of]), sep = " "), x = NULL, y = "New daily cases", caption = "Just kidding. The orange line connects Oklahoma case numbers reported\non Sundays and the blue is Oklahoma case numbers from Tuesdays") +
scale_color_manual(values = c("orange", "blue"))
## `summarise()` ungrouping output (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
mutate(noreast = if_else(code %in% (pk), "Northeastern states", "All other states")) %>%
group_by(noreast, as_of) %>%
summarize(us_cases = sum(cases, na.rm = TRUE) / sum(pop_2019, na.rm = TRUE) * 1000000,
us_cases_ch = sum(case_ch, na.rm = TRUE) / sum(pop_2019, na.rm = TRUE) * 1000000,
us_deaths = sum(death, na.rm = TRUE) / sum(pop_2019, na.rm = TRUE) * 1000000,
us_deaths_ch = sum(death_ch, na.rm = TRUE) / sum(pop_2019, na.rm = TRUE) * 1000000) %>%
# filter(us_cases > .00000009) %>%
mutate(since = min(as_of)) %>%
ggplot(mapping = aes(x = as_of, y = us_deaths_ch)) +
geom_smooth(linetype = 0, alpha = .0, mapping = aes(fill = weekdays(as_of), color = noreast), span = 0.8) +
geom_smooth(alpha = 0, size = 1, linetype = 2, mapping = aes(color = noreast), span = 0.2) +
# geom_smooth(color = "black") +
theme(legend.position = "none") +
labs(title = str_c(str_c(pk, collapse = ", "), " have switched w/others", sep = ""), x = "date", y = "COVID-19 deaths per million residents") +
ylim(0,7.5)
## Joining, by = "code"
## Joining, by = "name"
## `summarise()` regrouping output by 'noreast' (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 104 rows containing non-finite values (stat_smooth).
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 104 rows containing non-finite values (stat_smooth).
## Warning: Removed 56 rows containing missing values (geom_smooth).
## Warning: Removed 12 rows containing missing values (geom_smooth).
pop_dens <- read_csv("pop_density.csv") %>%
select(code, dens = Density, pop = Pop)
##
## -- Column specification --------------------------------------------------------
## cols(
## State = col_character(),
## code = col_character(),
## Density = col_double(),
## Pop = col_double(),
## LandArea = col_double()
## )
temp <- my_data %>%
inner_join(pop_dens) %>%
filter(as_of == max(as_of, na.rm = TRUE)) %>%
mutate(death_pm = death / pop * 1E6,
case_pm = cases / pop * 1E6,
log_death_pm = log(death_pm, 10))
## Joining, by = "code"
bestfit_intercept <- lm(formula = temp$log_death_pm ~ temp$dens)[[1]][[1]]
bestfit_slope <- lm(formula = temp$log_death_pm ~ temp$dens)[[1]][[2]]
my_data %>%
inner_join(results_2016) %>%
inner_join(pop_dens) %>%
filter(as_of == max(as_of)) %>%
mutate(death_pm = death/pop * 1000000,
log_death_pm = log(death_pm, base = 10)) %>%
group_by(code) %>%
mutate(hrc = (t_mgn < 0),
tier = case_when(dens >= 485 ~ "F",
between(dens, 183, 485) ~ "E",
between(dens, 69, 183) ~ "D",
between(dens, 26, 69) ~ "C",
between(dens, 10, 26) ~ "B",
dens < 10 ~ "A")) %>%
ungroup() %>%
ggplot(mapping = aes(x = dens, y = log_death_pm)) +
# , color = hrc, fill = hrc)) +
geom_text_repel(mapping = aes(x = dens, y = log_death_pm, label = code, fill = NULL, color = hrc), position = position_jitter()) +
# geom_smooth(linetype = 3, method = "lm", alpha = 0.1) +
geom_abline(slope = bestfit_slope, intercept = bestfit_intercept, linetype = 3) +
facet_wrap(vars(tier), scales = "free_x", nrow = 2) +
labs(x = "Population per square mile", y = "Log of COVID-19 deaths per million", title = "For many states, density is destiny", subtitle = str_c("States plotted by density and COVID-19 deaths along a best-fit line, as of ", case_growth$wk_ending[1])) +
theme(legend.position = "none", strip.text = element_blank()) +
ylim(0, 4)
## Joining, by = "code"
## Joining, by = "code"
my_data %>%
inner_join(results_2016) %>%
inner_join(pop_dens) %>%
filter(as_of == max(as_of)) %>%
mutate(death_pm = death/pop * 1000000,
log_death_pm = log(death_pm, base = 10),
log_death_pm_model = bestfit_intercept + bestfit_slope * dens,
death_pm_model = 10 ^ (log_death_pm_model),
death_pm_res_pct = (death_pm - death_pm_model) / death_pm_model * 100) %>%
ggplot(mapping = aes(x = reorder(code, desc(death_pm_res_pct)), y = death_pm_res_pct)) +
geom_label(mapping = aes(label = code, color = dens, fill = desc(dens))) +
# geom_smooth(linetype = 3, method = "lm", alpha = 0.1) +
labs(x = NULL, y = "Log of COVID-19 deaths per million", title = "For many states, density is density", subtitle = str_c("States plotted by density and COVID-19 deaths along a best-fit line, as of ", case_growth$wk_ending[1])) +
theme(legend.position = "none", strip.text = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
## Joining, by = "code"
## Joining, by = "code"
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(day_of_week = weekdays(as_of)) %>%
group_by(code, day_of_week) %>%
mutate(max_case_ch = max(case_ch),
hi_date = if_else(case_ch == max_case_ch, as_of, NULL),
case_ch_pm = as.integer(case_ch / pop_2019 * 1000000),
pos_rate = case_ch / test_ch) %>%
filter(hi_date == max(as_of)) %>%
select(as_of, code, case_ch_pm, pos_rate) %>%
arrange(desc(case_ch_pm)) %>%
group_by(code) %>%
summarize(as_of = max(as_of), case_ch_pm = mean(case_ch_pm, na.rm = TRUE), pos_rate = max(pos_rate, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = reorder(code, desc(case_ch_pm)), y = case_ch_pm, fill = pos_rate)) +
labs(title = str_c("States with a record number of new cases week ending ", max(my_data$as_of)),
y = "Daily new cases per million",
x = "State") +
geom_col() +
geom_label(mapping = aes(label = weekdays(as_of, abbreviate = TRUE)), color = "white") +
theme(axis.text.x = element_text(angle = 90, size = 8))
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Adding missing grouping variables: `day_of_week`
## `summarise()` ungrouping output (override with `.groups` argument)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(day_of_week = weekdays(as_of)) %>%
group_by(code, day_of_week) %>%
mutate(max_case_ch = max(case_ch),
hi_date = if_else(case_ch == max_case_ch, as_of, NULL),
case_ch_pm = as.integer(case_ch / pop_2019 * 1000000),
pos_rate = case_ch / test_ch,
is_hrc = (t_mgn < 0)) %>%
filter(hi_date == max(as_of)) %>%
select(as_of, code, case_ch_pm, pos_rate, is_hrc) %>%
arrange(desc(case_ch_pm)) %>%
group_by(code, is_hrc) %>%
summarize(as_of = max(as_of),
case_ch_pm = mean(case_ch_pm, na.rm = TRUE),
pos_rate = max(pos_rate, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = reorder(code, desc(case_ch_pm)), y = case_ch_pm, fill = is_hrc)) +
labs(title = str_c("States with a record number of new cases week ending ", max(my_data$as_of)),
y = "Daily new cases per million",
x = "State") +
geom_col(color = "white") +
geom_text(mapping = aes(label = weekdays(as_of, abbreviate = TRUE)), color = "black", angle = 90) +
theme(axis.text.x = element_text(angle = 90, size = 8))
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Adding missing grouping variables: `day_of_week`
## `summarise()` regrouping output by 'code' (override with `.groups` argument)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(day_of_week = weekdays(as_of)) %>%
group_by(code, day_of_week, t_mgn) %>%
mutate(max_case_ch = max(case_ch),
hi_date = if_else(case_ch == max_case_ch, as_of, NULL),
case_ch_pm = as.integer(case_ch / pop_2019 * 1000000),
pos_rate = case_ch / test_ch,
is_hrc = (t_mgn < 0)) %>%
filter(hi_date == max(as_of)) %>%
select(as_of, code, case_ch_pm, pos_rate, is_hrc) %>%
arrange(desc(case_ch_pm)) %>%
group_by(code, is_hrc) %>%
summarize(as_of = max(as_of), case_ch_pm = mean(case_ch_pm, na.rm = TRUE), pos_rate = max(pos_rate, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = reorder(code, desc(case_ch_pm)), y = case_ch_pm, fill = pos_rate)) +
labs(title = str_c("45 states had a record high number of new cases this week"),
subtitle = str_c("week ending ", max(my_data$as_of)),
y = "Daily new cases per million",
x = "State") +
geom_col() +
geom_label(mapping = aes(label = weekdays(as_of, abbreviate = TRUE)), color = "white") +
theme(axis.text.x = element_text(angle = 90, size = 8))
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Adding missing grouping variables: `day_of_week`, `t_mgn`
## `summarise()` regrouping output by 'code' (override with `.groups` argument)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(day_of_week = weekdays(as_of)) %>%
group_by(code, day_of_week) %>%
mutate(max_death_ch = max(death_ch),
hi_date = if_else(death_ch == max_death_ch, as_of, NULL),
death_ch_pm = as.integer(death_ch / pop_2019 * 1000000),
cfr = death_ch / case_ch,
is_hrc = (t_mgn < 0)) %>%
filter(hi_date == max(as_of)) %>%
select(as_of, code, death_ch_pm, cfr, is_hrc) %>%
arrange(desc(death_ch_pm)) %>%
group_by(code, is_hrc) %>%
summarize(as_of = max(as_of), death_ch_pm = mean(death_ch_pm, na.rm = TRUE), cfr = max(cfr, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = reorder(code, desc(death_ch_pm)), y = death_ch_pm, fill = is_hrc)) +
labs(title = str_c("States with a record number of deaths week ending ", max(my_data$as_of)),
y = "Daily deaths per million",
x = "State") +
geom_col() +
geom_label(mapping = aes(label = weekdays(as_of, abbreviate = TRUE)), color = "white")
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Adding missing grouping variables: `day_of_week`
## `summarise()` regrouping output by 'code' (override with `.groups` argument)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(year_mo = month(as_of)) %>%
group_by(year_mo, code) %>%
summarize(case_ch_pm = 30.5 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 30.5 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
mo_of = median(ymd(str_c("2020", year_mo, "15", sep = "-")))) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
group_by(year_mo) %>%
top_n(n = 10, case_ch_pm) %>%
ggplot(mapping = aes(x = mo_of, y = case_ch_pm, label = code, color = is_hrc, size = pos_rate)) +
geom_label(direction = "y", segment.color = NA, position = position_jitter(width = 10, height = 60)) +
theme(legend.position = "none") +
scale_size(range = c(3, 6)) +
labs(x = "month",
y = "monthly cases per million",
title = "Since June, red states have led in new cases per capita")
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_mo' (override with `.groups` argument)
## Warning: Ignoring unknown parameters: direction, segment.colour
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(year_mo = month(as_of)) %>%
group_by(year_mo, reg_2, code) %>%
summarize(case_ch_pm = 30.5 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 30.5 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
mo_of = median(ymd(str_c("2020", year_mo, "15", sep = "-")))) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
group_by(year_mo) %>%
top_n(n = 10, case_ch_pm) %>%
ggplot(mapping = aes(x = mo_of, y = case_ch_pm, label = code, fill = reg_2)) +
geom_label(direction = "y", segment.color = NA, position = position_jitter(width = 10, height = 60)) +
scale_size(range = c(3, 6)) +
theme(legend.title = element_blank()) +
labs(x = "month",
y = "monthly cases per million",
title = "Since June, red states have led in new cases per capita")
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_mo', 'reg_2' (override with `.groups` argument)
## Warning: Ignoring unknown parameters: direction, segment.colour
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
inner_join(pop_dens) %>%
mutate(year_wk = str_c(year(as_of), week(as_of), sep = "-")) %>%
group_by(year_wk, code, dens) %>%
summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
margin_2016 = mean(t_mgn, na.rm = TRUE),
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
wk_date = min(as_of, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
group_by(wk_date) %>%
top_n(n = 3, death_ch_pm) %>%
ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = dens)) +
geom_label(position = position_jitter(width = 10, height = 60), segment.color = NA) +
labs(title = "Less dense states have led death rates since late June",
subtitle = str_c("Three states with most per capita COVID-19 deaths by week through ", max(my_data[,as_of])),
y = "Weekly COVID-19 deaths per million residents",
x = NULL,
fill = "pop. per\nsq. mi.") +
scale_fill_gradient2(low = "dodgerblue", aesthetics = "fill", high = "orange" , midpoint = 300)
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk', 'code' (override with `.groups` argument)
## Warning: Ignoring unknown parameters: segment.colour
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(year_wk = week(as_of)) %>%
group_by(year_wk, code) %>%
summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
margin_2016 = mean(t_mgn, na.rm = TRUE),
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
wk_date = min(as_of, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
group_by(wk_date) %>%
mutate(nat_death = sum(death_ch_pm)) %>%
top_n(n = 3, death_ch_pm) %>%
mutate(three_share = sum(death_ch_pm) / nat_death) %>%
ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = margin_2016)) +
geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
labs(subtitle = str_c("Top 3 states in per capita COVID-19 deaths each week, through ", max(my_data$as_of, na.rm = TRUE), sep = " "),
title = "States Trump won in 2016 have led death rates since late June",
y = "Weekly deaths per million residents",
x = NULL,
fill = "Trump margin\nin 2016") +
xlim(ymd(20200201), today()) +
scale_fill_gradient2(low = "dodgerblue", aesthetics = "fill", high = "pink" , midpoint = 0)
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk' (override with `.groups` argument)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(year_wk = str_c(year(as_of), week(as_of))) %>%
group_by(year_wk, code) %>%
summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
wk_date = min(as_of, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
group_by(wk_date) %>%
top_n(n = 3, case_ch_pm) %>%
ggplot(mapping = aes(x = wk_date, y = case_ch_pm, label = code, fill = is_hrc)) +
geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
theme(legend.position = "none") +
labs(title = "Since June, states that voted for President Trump have led infection rates",
subtitle = str_c("Three states with most new per capita COVID-19 cases by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
y = "Weekly new COVID-19 cases per million residents",
x = "Date") +
xlim(ymd(20200301), today())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk' (override with `.groups` argument)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
inner_join(pop_dens) %>%
mutate(year_wk = week(as_of)) %>%
group_by(year_wk, reg_2, code, dens) %>%
summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
wk_date = min(as_of, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
group_by(wk_date) %>%
top_n(n = 3, case_ch_pm) %>%
mutate(wk_dens = mean(dens, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = wk_date, y = case_ch_pm)) +
geom_label(mapping = aes(label = code, fill = reg_2), na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
# geom_col(mapping = aes(y = wk_dens), alpha = .4) +
labs(subtitle = str_c("Three states with most new per capita COVID-19 cases by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
y = "Weekly new COVID-19 cases per million residents",
x = "Date",
legend = NULL
# ,caption = "Gray bars represent mean population density of top states"
) +
xlim(ymd(20200301), today()) +
theme(legend.title = element_blank())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk', 'reg_2', 'code' (override with `.groups` argument)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(year_wk = week(as_of)) %>%
group_by(year_wk, reg_2, code) %>%
summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
wk_date = min(as_of, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
group_by(wk_date) %>%
top_n(n = 3, case_ch_pm) %>%
ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = reg_2)) +
geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
theme(
# legend.position = "none"
legend.title = element_blank()) +
labs(subtitle = str_c("Top 3 states in per capita COVID-19 deaths by week, through ", max(my_data$as_of, na.rm = TRUE), sep = " "),
y = "Weekly COVID-19 deaths per million residents",
x = "Date") +
xlim(ymd(20200301), today())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk', 'reg_2' (override with `.groups` argument)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(year_wk = week(as_of)) %>%
group_by(year_wk, reg_2, code) %>%
summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
wk_date = min(as_of, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
group_by(wk_date) %>%
top_n(n = 5, case_ch_pm) %>%
ggplot(mapping = aes(x = wk_date, y = case_ch_pm, label = code, fill = reg_2)) +
geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
theme(legend.title = element_blank()) +
labs(subtitle = str_c("Five states with most per capita COVID-19 cases through", max(my_data$as_of, na.rm = TRUE), sep = " "),
y = "Weekly COVID-19 cases per million residents",
x = "Date") +
xlim(ymd(20200301), today())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk', 'reg_2' (override with `.groups` argument)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
inner_join(pop_dens) %>%
mutate(year_wk = week(as_of)) %>%
group_by(year_wk, reg_2, code, dens) %>%
summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
wk_date = min(as_of, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
group_by(wk_date) %>%
top_n(n = 3, case_ch_pm) %>%
ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = dens)) +
geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25), color = "black") +
# theme(legend.position = "none") +
labs(subtitle = str_c("Three states with most weekly per capita COVID-19 deaths through", max(my_data$as_of, na.rm = TRUE), sep = " "),
y = "Weekly COVID-19 deaths per million residents",
x = "Date",
fill = "pop. per\nsq. mi.") +
xlim(ymd(20200301), today()) +
theme_bw() +
scale_fill_gradientn(colors = c("orange", "white", "dodgerblue"))
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk', 'reg_2', 'code' (override with `.groups` argument)
Determine weekday factors
wday_fact <- my_data %>%
filter(as_of >= today() - months(1)) %>%
mutate(wk_num = week(as_of),
wkday = lubridate::wday(as_of),
case_ch = if_else(case_ch == 0, 1L, case_ch),
death_ch = if_else(death_ch == 0, 1L, death_ch)) %>%
group_by(code, wk_num) %>%
mutate(wday_case_local = case_ch / mean(case_ch, na.rm = TRUE),
wday_death_local = death_ch / mean(death_ch, na.rm = TRUE)) %>%
ungroup() %>%
group_by(code, wkday) %>%
summarize(wday_case_global = mean(wday_case_local, na.rm = TRUE),
wday_death_global = mean(wday_death_local, na.rm = TRUE))
## `summarise()` regrouping output by 'code' (override with `.groups` argument)
my_data %>%
mutate(wkday = lubridate::wday(as_of)) %>%
inner_join(wday_fact) %>%
filter(code == "CA", as_of > today() - weeks(2)) %>%
mutate(case_ch_adj = case_ch / wday_case_global,
death_ch_adj = death_ch / wday_death_global,
t_day = wkday + 1) %>%
mutate(day_name = lubridate::wday(x = t_day, label = TRUE)) %>%
ggplot(mapping = aes(x = as_of)) +
geom_line(mapping = aes(y = case_ch), color = "gray", size = 3) +
geom_line(mapping = aes(y = case_ch_adj))
## Joining, by = c("code", "wkday")
my_data %>%
inner_join(wday_fact) %>%
inner_join(results_2016) %>%
group_by(code) %>%
mutate(case_ch_adj = wday_case_global * case_ch,
death_ch_adj = wday_death_global * death_ch,
t_day = wkday + 1) %>%
filter(case_ch_adj == max(case_ch_adj, na.rm = TRUE) &
as_of == max(as_of)) %>%
mutate(day_name = lubridate::wday(x = t_day, label = TRUE)) %>%
ggplot(mapping = aes(x = code, y = case_ch_adj)) +
geom_col() +
geom_label(mapping = aes(label = day_name)) +
labs(subtitle = "States with record adjusted new cases reported today")
## Joining, by = "code"
## Joining, by = "code"
my_data %>%
inner_join(wday_fact) %>%
inner_join(results_2016) %>%
group_by(code) %>%
mutate(case_ch_adj = wday_case_global * case_ch,
death_ch_adj = wday_death_global * death_ch) %>%
filter(death_ch_adj == max(death_ch_adj, na.rm = TRUE) &
as_of == max(as_of)) %>%
mutate(day_name = lubridate::wday(wkday + 1, label = TRUE)) %>%
ggplot(mapping = aes(x = code, y = death_ch_adj)) +
geom_col() +
geom_label(mapping = aes(label = day_name)) +
labs(subtitle = "States with record adjusted deaths reported today")
## Joining, by = "code"
## Joining, by = "code"
pk <- c("CA", "CT", "MI", "NJ", "NY", "TX", "FL")
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(year_wk = week(as_of)) %>%
filter(code %in% pk) %>%
group_by(year_wk, code) %>%
summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
wk_date = min(as_of, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = code, color = code)) +
labs(subtitle = str_c("Compare some states through", max(my_data$as_of, na.rm = TRUE), sep = " "),
y = "Weekly COVID-19 deaths per million residents",
x = "Date") +
xlim(ymd(20200301), today()) +
geom_smooth(alpha = 0.1, span = .2) +
theme_fivethirtyeight() +
theme(legend.title = element_blank())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk' (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
nation_wk <- my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
mutate(year_wk = week(as_of)) %>%
# filter(code %in% c("CA")) %>%
group_by(year_wk) %>%
summarize(case_ch_pm = 7 * mean(sum(case_ch) / sum(pop_2019) * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(sum(death_ch) / sum(pop_2019) * 1000000, na.rm = TRUE),
test_ch_pm = 7 * mean(sum(test_ch) / sum(pop_2019) * 1000000, na.rm = TRUE),
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
wk_date = min(as_of, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0)
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` ungrouping output (override with `.groups` argument)
nation_wk %>%
ggplot(mapping = aes(x = wk_date, y = case_ch_pm)) +
labs(subtitle = str_c("Per capita COVID-19 cases by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
y = "Weekly COVID-19 cases per million residents",
x = "Date") +
xlim(ymd(20200301), today()) +
geom_smooth(span = 0.2, na.rm = TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
nation_wk %>%
ggplot(mapping = aes(x = wk_date, y = death_ch_pm)) +
labs(subtitle = str_c("Per capita COVID-19 deaths by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
y = "Weekly COVID-19 deaths per million residents",
x = "Date") +
xlim(ymd(20200301), today()) +
geom_line(na.rm = TRUE)
my_data %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
inner_join(results_2016) %>%
filter(lubridate::wday(as_of) == lubridate::wday(max(as_of))) %>%
# filter(code %in% c("CA")) %>%
group_by(as_of) %>%
summarize(case_ch_pm = 7 * mean(sum(case_ch) / sum(pop_2019) * 1000000, na.rm = TRUE),
death_ch_pm = 7 * mean(sum(death_ch) / sum(pop_2019) * 1000000, na.rm = TRUE),
test_ch_pm = 7 * mean(sum(test_ch) / sum(pop_2019) * 1000000, na.rm = TRUE),
pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE)) %>%
filter(case_ch_pm > 0, death_ch_pm > 0) %>%
ggplot(mapping = aes(x = as_of, y = death_ch_pm)) +
labs(subtitle = str_c("Per capita COVID-19 deaths by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
y = "Weekly COVID-19 deaths per million residents",
x = "Date") +
xlim(ymd(20200301), today()) +
geom_smooth(na.rm = TRUE, color = "dodgerblue", span = 0.15)
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` ungrouping output (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
cor_data <- my_data %>%
select(as_of, code, case_ch, death_ch) %>%
filter(as_of > max(as_of) - months(6))
j <- data.table(code = character(), d_offset = integer(), cor_offset = double(), cfr = double())
st_vector <- code_names %>% select(code) %>% arrange(code) %>% pull(code)
for (k in st_vector) {
for (i in 10:40) {
j <- rbind(j,
data.table(code = k,
d_offset = i,
cor_offset = cor_data %>%
filter(code == k) %>%
mutate(d_lag = as_of + days(i)) %>%
left_join(cor_data[code == k], by = c("d_lag" = "as_of")) %>%
pull(-1) %>%
cor(x = ., y = cor_data[code == k]$case_ch, use = "complete.obs"),
cfr = (cor_data[code == k & between(as_of, max(as_of) - 14, max(as_of))]$death) / (cor_data[code == k & between(as_of, max(as_of) -14 - i, max(as_of) - i)]$case_ch)))
}
}
case_death <- j %>%
group_by(code) %>%
arrange(desc(cor_offset)) %>%
slice_head()
my_data %>%
inner_join(case_death) %>%
inner_join(code_names) %>%
inner_join(st_pop_2019) %>%
group_by(code) %>%
filter(between(as_of, max(as_of, na.rm = TRUE) - d_offset + 1, max(as_of, na.rm = TRUE) - d_offset + 14)) %>%
mutate(as_of_proj = as_of + d_offset,
wk_proj = if_else(as_of <= max(as_of, na.rm = TRUE) - 7, str_c("Week starting ", max(my_data$as_of) + days(1)), str_c("Week starting ", max(my_data$as_of) + days(8))),
death_pm_proj_day = case_ch * cfr / pop_2019 * 1E6,
two_wk = sum(death_pm_proj_day)) %>%
ungroup() %>%
group_by(code, wk_proj, two_wk) %>%
summarize(death_pm_proj_wk = sum(death_pm_proj_day, na.rm = TRUE)) %>%
ggplot(mapping = aes(y = death_pm_proj_wk, x = wk_proj, label = code)) +
geom_label_repel(color = "chocolate4", fill = "forestgreen")
## Joining, by = "code"
## Joining, by = "code"
## Joining, by = "name"
## `summarise()` regrouping output by 'code', 'wk_proj' (override with `.groups` argument)
case_death %>%
ggplot(mapping = aes(x = cfr)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
lag_4wk_data <- my_data %>%
select(as_of_lag_4wk = as_of, code, case_ch, test_ch) %>%
filter(case_ch * test_ch > 0,
test_ch >= case_ch) %>%
mutate(pos_rate_4wk = case_ch / test_ch) %>%
filter(pos_rate_4wk < 0.5) %>%
select(-test_ch, -case_ch)
## Warning in case_ch * test_ch: NAs produced by integer overflow
lag_3wk_data <- my_data %>%
select(as_of_lag_3wk = as_of, code, case_ch_3wk = case_ch) %>%
filter(case_ch_3wk > 0)
cor_data <- my_data %>%
select(as_of, code, death_ch) %>%
filter(as_of > max(as_of) - months(6)) %>%
mutate(as_of_lag_4wk = as_of - weeks(4),
as_of_lag_3wk = as_of - weeks(3)) %>%
inner_join(lag_4wk_data) %>%
inner_join(lag_3wk_data)
## Joining, by = c("code", "as_of_lag_4wk")
## Joining, by = c("code", "as_of_lag_3wk")
lag_1wk_data <- my_data %>%
select(as_of_lag_1wk = as_of, code, case_ch, test_ch) %>%
filter(case_ch > 0,
test_ch > 0,
test_ch >= case_ch) %>%
mutate(pos_rate_lag_1wk = case_ch / test_ch) %>%
filter(pos_rate_lag_1wk < 0.5,
!is.na(pos_rate_lag_1wk)) %>%
select(-test_ch, -case_ch)
lead_3wk_data <- my_data %>%
select(as_of_lead_3wk = as_of, code,
death_ch_lead_3wk = death_ch) %>%
filter(death_ch_lead_3wk > 0)
cor_data <- my_data %>%
select(as_of, code, case_ch) %>%
filter(as_of > max(as_of) - months(10)) %>%
mutate(as_of_lag_1wk = as_of - weeks(1),
as_of_lead_3wk = as_of + weeks(3)) %>%
inner_join(lag_1wk_data) %>%
left_join(lead_3wk_data) %>%
arrange(desc(as_of_lead_3wk))
## Joining, by = c("code", "as_of_lag_1wk")
## Joining, by = c("code", "as_of_lead_3wk")
# cor_data[code == "CA" & as_of > ymd(20201215)] %>% ggplot(aes(pos_rate_lag_1wk)) + geom_histogram()
# cor_data[code == "CA"] %>% ggplot(aes(as_of, pos_rate_lag_1wk)) + geom_line()
#
# cor_data %>%
# filter(code == 'CA',
# as_of > ymd(20201201)) %>%
# ggplot() +
# geom_abline(slope = lm(cor_data[code == 'CA', death_ch_lead_3wk] ~ 0 + cor_data[code == 'CA', cor_data[code == 'CA', case_ch]])[[1]][[1]]) +
# geom_abline(slope = lm(cor_data[code == 'CA', death_ch_lead_3wk] ~ 0 + cor_data[code == 'CA', cor_data[code == 'CA', case_ch]][[1]][[1]] + cor_data[code == 'CA', cor_data[code == 'CA', case_ch]])[[1]][[1]])
#
#
# lm(cor_data[code == 'CA', death_ch_lead_3wk])
#
#
#
# cor_data %>%
# filter(code == 'CA',
# pos_rate_lag_1wk > 0.2) %>%
# lm(formula = death_ch_lead_3wk ~ 0 + case_ch) %>%
# .[[1]]
#
# cor_data %>%
# filter(code == 'CA',
# pos_rate_lag_1wk > 0.2) %>%
# lm(formula = death_ch_lead_3wk ~ 0 + case_ch + pos_rate_lag_1wk) %>%
# .[[1]]
#
# (1006 - 0103) / 103
m_data <- data.table(survey_date = ymd(c(19660801, 19650501, 19640801, 19630501)), tot_fav = c(.33, .45, .44, .41), tot_unfav = c(.63, .46, .38, .37))
m_data %>%
ggplot(mapping = aes(survey_date)) +
geom_line(aes(y = tot_fav), color = "dodgerblue", size = 1.25) +
geom_line(aes(y = tot_unfav), color = "darkgray", size = 1.25) +
geom_label(aes(x = unique(m_data[year(survey_date) == 1964, survey_date]), y = unique(m_data[year(survey_date) == 1964, tot_fav]), label = "Favorable"), color = "dodgerblue") +
geom_label(aes(x = m_data[year(survey_date) == 1965, survey_date], y = m_data[year(survey_date) == 1965, tot_unfav], label = "Unfavorable"), color = "dark gray") +
geom_hline(aes(yintercept = .94), color = "lightblue", size = 1.25, linetype = "dashed") + geom_label(x = ymd(19650601), y = .94, label = "Favorable in Aug. 2011", color = "dodgerblue") +
geom_hline(aes(yintercept = .04), color = "gray", size = 1.25, linetype = "dashed") + geom_label(x = ymd(19650601), y = .04, label = "Unfavorable in Aug. 2011", color = "darkgray") +
labs(x = "Date",
y = "Share of respondents by answer",
caption = "Source: Contemporary poll data from Gallup\nhttps://tinyurl.com/y3zgrl5a",
title = "While he lived, the public viewed MLK increasingly less favorably ")